suppressPackageStartupMessages(source("/home/guz/project/development/cola/load.R"))

Data is from https://tcga-data.nci.nih.gov/docs/publications/gbm_exp/.

data = read.table("/icgc/dkfzlsdf/analysis/B080/guz/subgroup_test/unifiedScaled.txt", 
    header = TRUE, row.names = 1, check.names = FALSE)
data = as.matrix(data)

subtype = read.table("/icgc/dkfzlsdf/analysis/B080/guz/subgroup_test/TCGA_unified_CORE_ClaNC840.txt", 
    sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE)
subtype = structure(unlist(subtype[1, -(1:2)]), names = colnames(subtype)[-(1:2)])

data = data[, names(subtype)]
dim(data)
## [1] 11861   173
table(subtype)
## subtype
##   Classical Mesenchymal      Neural   Proneural 
##          38          56          26          53

Get all supported top methods and partition methods:

ALL_TOP_VALUE_METHOD()
## [1] "sd"  "vc"  "MAD" "AAC"
ALL_PARTITION_METHOD()
## [1] "hclust"  "kmeans"  "skmeans" "Mclust"  "clara"   "pam"     "cclust"

Run clustering for all combination of methods in batch:

res_list = run_all(data, top_n = c(2000, 4000, 6000), k = 2:6, known = subtype, mc.cores = 4)
res_list = readRDS("/icgc/dkfzlsdf/analysis/B080/guz/subgroup_test/TCGA_subgroup_p0.8.rds")
res_list
## Top rows are extracted by 'sd, vc, MAD, AAC' methods.
## Subgroups are detected by 'hclust, kmeans, skmeans, Mclust, clara, pam, cclust' method.
## Number of partitions are tried for k = 2, 3, 4, 5, 6

Collect all plots for a k:

collect_plots(res_list, fun = plot_ecdf)

plot of chunk unnamed-chunk-6

collect_plots(res_list, k = 3, fun = consensus_heatmap)

plot of chunk unnamed-chunk-6

collect_plots(res_list, k = 3, fun = membership_heatmap)

plot of chunk unnamed-chunk-6

collect_plots(res_list, k = 3, fun = get_signatures)

plot of chunk unnamed-chunk-6

Overlap of top rows in different top methods:

par(mfrow = c(1, 3))
top_rows_overlap(res_list, top_n = 2000)
top_rows_overlap(res_list, top_n = 4000)
top_rows_overlap(res_list, top_n = 6000)

plot of chunk unnamed-chunk-7

Also visualize the correspondance of rankings between different scoreing methods:

top_rows_overlap(res_list, top_n = 2000, type = "correspondance")

plot of chunk unnamed-chunk-8

Heatmaps for the top rows:

top_rows_heatmap(res_list, top_n = 2000)

plot of chunk unnamed-chunk-9plot of chunk unnamed-chunk-9plot of chunk unnamed-chunk-9plot of chunk unnamed-chunk-9

Get clustering in a specified combination of top method and partition method:

res = get_single_run(res_list, top_method = "AAC", partition_method = "skmeans")
res
## top rows are extracted by 'AAC' method.
## Subgroups are detected by 'skmeans' method.
## Number of partitionings are tried for k = 2, 3, 4, 5, 6

Collect all plots

collect_plots(res)

plot of chunk unnamed-chunk-11

plots:

select_k(res)

plot of chunk unnamed-chunk-12

consensus_heatmap(res, k = 3)

plot of chunk unnamed-chunk-12

membership_heatmap(res, k = 3)

plot of chunk unnamed-chunk-12

get_signatures(res, k = 3)

plot of chunk unnamed-chunk-12

Get classifications

class_df = get_class(res, k = 3)
head(class_df)
##                     p1 p2 p3 silhouette class
## TCGA-02-0003-01A-01  0  1  0   0.977354     2
## TCGA-02-0010-01A-01  0  1  0   0.977354     2
## TCGA-02-0011-01B-01  0  1  0   0.977354     2
## TCGA-02-0014-01A-01  0  1  0   0.977354     2
## TCGA-02-0024-01B-01  0  1  0   0.977354     2
## TCGA-02-0026-01B-01  0  1  0   0.977354     2

MDS or T-sne plots:

dimension_reduction(res, k = 3)

plot of chunk unnamed-chunk-14

dimension_reduction(res, k = 3, method = "tsne")

plot of chunk unnamed-chunk-14

Consistency of classes.

collect_classes(res_list, k = 3)

plot of chunk unnamed-chunk-15

collect_classes(res)

plot of chunk unnamed-chunk-15